# makeing lists for sorting data and countries
mycountries <- c("Bolivia", "Brazil","Bulgaria","Canada", "Chile","United States","India" ,"Pakistan" ,"Nepal","Belgium" ,"Albania", "Denmark","Czech Republic", "Finland","France","Germany","Ireland","Italy","Iceland","Netherlands","Poland","Norway", "Spain","Switzerland","Turkey", "Ukraine","Sweden", "Russian Federation", "Portugal", "United Kingdom", "Israel" ,"Saudi Arabia","United Arab Emirates" ,"Libya" ,"Egypt, Arab Rep.", "American Samoa","Cambodia","China" ,"Japan" ,"Mongolia" ,"Malaysia" ,"New Caledonia", "Marshall Islands" ,"Indonesia","Fiji","Guam", "Australia" ,"Congo, Dem. Rep." ,"Central African Republic" ,"Congo, Rep.","Nigeria" ,"Liberia","Kenya","Madagascar","Mali", "Rwanda", "Somalia","South Sudan" ,"Uganda" ,"Zimbabwe","Sudan","South Africa","Sierra Leone","Ethiopia","Ghana","Botswana" ,"Angola","Cabo Verde", "Cameroon","Comoros","Seychelles","Niger","Zambia","Uruguay", "Venezuela, RB","Panama", "Mexico", "Peru", "Paraguay","Nicaragua","Jamaica","Guatemala" ,"El Salvador" ,"Haiti","Dominican Republic","Ecuador" ,"Honduras","Colombia","Cuba","Costa Rica" ,"Argentina" ,"Belize")
#unique(WED_Agg$`Country Name`)
Aggs <- c("North America","Middle East & North Africa","European Union","Africa Eastern and Southern", "Africa Western and Central","East Asia & Pacific", "Latin America & Caribbean")
North_America <-c("Canada", "United States", "Mexico","Jamaica","Haiti","Cuba","Costa Rica", "Dominican Republic" )
Europe <-c("Bulgaria","Belgium" ,"Albania", "Denmark","Czech Republic", "Finland","France","Germany","Ireland","Italy","Iceland","Netherlands","Poland","Norway", "Spain","Switzerland","Turkey", "Ukraine","Sweden", "Portugal", "United Kingdom")
Africa <- c("Libya" ,"Egypt, Arab Rep.","Congo, Dem. Rep." ,"Central African Republic" ,"Congo, Rep.","Nigeria" ,"Liberia","Kenya","Madagascar","Mali", "Rwanda", "Somalia","South Sudan" ,"Uganda" ,"Zimbabwe","Sudan","South Africa","Sierra Leone","Ethiopia","Ghana","Botswana" ,"Angola","Cabo Verde", "Cameroon","Comoros","Seychelles","Niger","Zambia")
Asia <- c("India" ,"Pakistan" ,"Nepal", "Russian Federation", "Israel" ,"Saudi Arabia","United Arab Emirates","Cambodia","China" ,"Japan" ,"Mongolia")
South_America <- c("Bolivia", "Brazil", "Chile","Uruguay", "Venezuela, RB","Panama", "Peru", "Paraguay","Nicaragua","Guatemala" ,"El Salvador" ,"Ecuador" ,"Honduras","Colombia","Argentina" ,"Belize")
Oceania <-c("American Samoa" ,"Malaysia" ,"New Caledonia", "Marshall Islands" ,"Indonesia","Fiji","Guam", "Australia" )
# Use this R-Chunk to import all your datasets!
WED <- read_csv("world_econ_dat.csv") # Economic indicators
POP <- read_csv("population.csv") # Mortality and Population
POP <- POP %>% filter(`Country Name` %in% mycountries | `Country Name` %in% Aggs)
This project I wanted to try and recreate some of the animated charts I’ve seen from gapminder and other economics sources. I tried using both gganimate and plotly to do this in different ways. I’ve included two plots for both options aswell as a sample of the final data.
When reading in this data the initial import was not too complicated, getting the data I wanted was fairly easy through world bank. The incoming column names are a bit long and the orientation of that data was a mix between Long and Wide. It took me a few trys (spent about 6 hours on this part) trying to reshape it to fit my needs better. I feel significantly more comfortable with pivots now. I learned how to pivot multiple values at a time as well as use regex within strings to rename and remove some section of my names.
# Append population and mortality data to Economic indicators (without forecast years)
Full_dat <- rbind(POP[,1:54],WED)
# Aggregate data
Full_Agg <- Full_dat %>% filter(`Country Name` %in% Aggs)
Full_Agg_Long <- Full_Agg %>% pivot_longer(!c(`Country Name`, `Country Code`, `Series Name`, `Series Code`), names_to = "year", values_to = "value")
Full_Agg_Long <- Full_Agg_Long[,-4]
Full_Agg_Wide <- Full_Agg_Long %>% pivot_wider(names_from = c(`Series Name`), values_from = !c(`Series Name`, `Country Name`,`year`, `Country Code`))
Full_Agg_Wide$year <- as.integer(substr(Full_Agg_Wide$year, 1,4))
Full_Agg_Wide<-replace(Full_Agg_Wide, Full_Agg_Wide == "..",NA)
# Full data
Full_dat <- Full_dat %>% filter(`Country Name` %in% mycountries)
Full_dat_Long <- Full_dat %>% pivot_longer(!c(`Country Name`, `Country Code`, `Series Name`, `Series Code`), names_to = "year", values_to = "value")
Full_dat_Long <- Full_dat_Long[,-4]
Full_dat_Wide <- Full_dat_Long %>% pivot_wider(names_from = c(`Series Name`), values_from = !c(`Series Name`, `Country Name`,`year`, `Country Code`))
Full_dat_Wide$year <- as.integer(substr(Full_dat_Wide$year, 1,4))
Full_dat_Wide<-replace(Full_dat_Wide, Full_dat_Wide == "..",NA)
Full_dat_Wide$Continent <- case_when(Full_dat_Wide$`Country Name` %in% North_America ~ "North America",
Full_dat_Wide$`Country Name` %in% South_America ~ "South America",
Full_dat_Wide$`Country Name` %in% Africa ~ "Africa",
Full_dat_Wide$`Country Name` %in% Asia ~ "Asia",
Full_dat_Wide$`Country Name` %in% Europe ~ "Europe",
Full_dat_Wide$`Country Name` %in% Oceania ~ "Oceania")
# Change column data Types
Full_dat_Wide$`Inflation, GDP deflator (annual %)` <- as.numeric(Full_dat_Wide$`Inflation, GDP deflator (annual %)`)
Full_dat_Wide$`Gross savings (% of GDP)` <- as.numeric(Full_dat_Wide$`Gross savings (% of GDP)`)
Full_dat_Wide$`Gross domestic savings (% of GDP)` <- as.numeric(Full_dat_Wide$`Gross domestic savings (% of GDP)`)
Full_dat_Wide$`GDP per capita (constant 2015 US$)` <- as.numeric(Full_dat_Wide$`GDP per capita (constant 2015 US$)`)
Full_dat_Wide$`GDP growth (annual %)` <- as.numeric(Full_dat_Wide$`GDP growth (annual %)`)
Full_dat_Wide$`GDP (constant 2015 US$ Millions)` <- as.numeric(Full_dat_Wide$`GDP (constant 2015 US$)`)/ 1000000
Full_dat_Wide$`Current health expenditure (% of GDP)` <- as.numeric(Full_dat_Wide$`Current health expenditure (% of GDP)`)
Full_dat_Wide$`Central government debt, total (% of GDP)` <- as.numeric(Full_dat_Wide$`Central government debt, total (% of GDP)`)
Full_dat_Wide$`Unemployment, total (% of total labor force) (national estimate)` <- as.numeric(Full_dat_Wide$`Unemployment, total (% of total labor force) (national estimate)`)
Full_dat_Wide$`GDP per capita growth (annual %)` <- as.numeric(Full_dat_Wide$`GDP per capita growth (annual %)`)
Full_dat_Wide$`Country Name` <- as.factor(Full_dat_Wide$`Country Name`)
Full_dat_Wide$`Continent` <- as.factor(Full_dat_Wide$`Continent`)
Full_dat_Wide$`Population, total` <- as.numeric(Full_dat_Wide$`Population, total`)
Full_dat_Wide$`Mortality rate, under-5 (per 1,000)` <- as.numeric(Full_dat_Wide$`Mortality rate, under-5 (per 1,000)`)
Full_dat_Wide %>% glimpse(60)
## Rows: 4,600
## Columns: 17
## $ `Country Name` <fct> ~
## $ `Country Code` <chr> ~
## $ year <int> ~
## $ `Mortality rate, under-5 (per 1,000)` <dbl> ~
## $ `Population, total` <dbl> ~
## $ `Inflation, GDP deflator (annual %)` <dbl> ~
## $ `Gross savings (% of GDP)` <dbl> ~
## $ `Gross domestic savings (% of GDP)` <dbl> ~
## $ `GDP per capita (constant 2015 US$)` <dbl> ~
## $ `GDP growth (annual %)` <dbl> ~
## $ `GDP (constant 2015 US$)` <chr> ~
## $ `Current health expenditure (% of GDP)` <dbl> ~
## $ `Central government debt, total (% of GDP)` <dbl> ~
## $ `Unemployment, total (% of total labor force) (national estimate)` <dbl> ~
## $ `GDP per capita growth (annual %)` <dbl> ~
## $ Continent <fct> ~
## $ `GDP (constant 2015 US$ Millions)` <dbl> ~
datatable(Full_dat_Wide[1:500,], options=list(lengthMenu = c(3,5,10)), extensions="Responsive")
# Use this R-Chunk to clean & wrangle your data!
WED <- WED[, c(-4)]
WED_Agg <- WED[1:80, ]
WED_C <- WED[-1:-80, ]
WED_C_If <- WED_C
#Remove unwanted data, reshape.
WED_Agg <- WED_Agg[-21:-30, ]
WED_Agg_Long <- WED_Agg %>% pivot_longer(!c(`Country Name`, `Series Name`), names_to = "year", values_to = "percent")
WED_Agg_Wide <- WED_Agg_Long %>% pivot_wider(names_from = c(`Series Name`), values_from = !c(`Series Name`, `Country Name`,`year`))
#Fix Names
names_str <- names(WED_Agg_Wide)
names_string <- str_replace(names_str, pattern = "\\s+", replacement = "_") %>%
str_replace(pattern = ("[_].*_"), replacement = "_") %>%
str_replace_all(pattern = "\\s", replacement = "_")
WED_Agg_Wide <- setNames(WED_Agg_Wide,names_string)
#names(WED_Agg_Wide)
#Fix Data types
WED_Agg_Wide<-replace(WED_Agg_Wide, WED_Agg_Wide == "..",NA)
WED_Agg_Wide$year <- as.numeric(substr(WED_Agg_Wide$year, 1,4))
# change column types
WED_Agg_Wide$`Inflation,_GDP_deflator_(annual_%)` <- as.numeric(WED_Agg_Wide$`Inflation,_GDP_deflator_(annual_%)`)
WED_Agg_Wide$`Gross_savings_(%_of_GDP)` <- as.numeric(WED_Agg_Wide$`Gross_savings_(%_of_GDP)`)
WED_Agg_Wide$`Gross_domestic_savings_(%_of_GDP)` <- as.numeric(WED_Agg_Wide$`Gross_domestic_savings_(%_of_GDP)`)
WED_Agg_Wide$`GDP_per_capita_(constant_2015_US$)` <- as.numeric(WED_Agg_Wide$`GDP_per_capita_(constant_2015_US$)`)
WED_Agg_Wide$`GDP_growth_(annual_%)` <- as.numeric(WED_Agg_Wide$`GDP_growth_(annual_%)`)
WED_Agg_Wide$`GDP_(constant_2015_US$)` <- as.numeric(WED_Agg_Wide$`GDP_(constant_2015_US$)`)/ 1000000
WED_Agg_Wide$`Current_health_expenditure_(%_of_GDP)` <- as.numeric(WED_Agg_Wide$`Current_health_expenditure_(%_of_GDP)`)
WED_Agg_Wide$`Central_government_debt,_total_(%_of_GDP)` <- as.numeric(WED_Agg_Wide$`Central_government_debt,_total_(%_of_GDP)`)
WED_Agg_Wide$`Unemployment,_total_(%_of_total_labor_force)_(national_estimate)` <- as.numeric(WED_Agg_Wide$`Unemployment,_total_(%_of_total_labor_force)_(national_estimate)`)
WED_Agg_Wide$`GDP_per_capita_growth_(annual_%)` <- as.numeric(WED_Agg_Wide$`GDP_per_capita_growth_(annual_%)`)
WED_Agg_Wide$Country_Name <- as.factor(WED_Agg_Wide$Country_Name)
#
WED_Agg_Long_p <- WED_Agg_Long
WED_Agg_Long_p<-replace(WED_Agg_Long_p, WED_Agg_Long_p == "..",NA)
WED_Agg_Long_p$year <- as.numeric(substr(WED_Agg_Long_p$year, 1,4))
WED_Agg_Long_p$percent <- as.numeric(WED_Agg_Long_p$percent)
WED_Agg_Long_p$`Country Name` <- as.factor(WED_Agg_Long_p$`Country Name`)
WED_Agg_Long_p$`Series Name` <- as.factor(WED_Agg_Long_p$`Series Name`)
WED_Agg_Long_p <- filter(WED_Agg_Long_p, `Series Name` != "GDP (constant 2015 US$)")
WED_Agg_Long_p <- filter(WED_Agg_Long_p, `Series Name` != "GDP per capita (constant 2015 US$ Millions)" )
Full_Agg_Wide1 = Full_Agg_Wide %>% filter(year >1987)
p <- ggplot(
Full_Agg_Wide1,
aes(x = as.numeric(`Gross savings (% of GDP)`), y=as.numeric(`Mortality rate, under-5 (per 1,000)`), size = as.numeric(`Population, total`), colour = `Country Name`)
) +
geom_point(show.legend = FALSE, alpha = 0.7) +
scale_color_viridis_d() +
scale_size(range = c(2, 12)) +
scale_x_log10() +
labs(x = "Gross savings (% of GDP)", y = "Mortality rate, under-5 (per 1,000)", size= "Population in Millions")
p <-p + transition_time(Full_Agg_Wide1$year) +
labs(title = "Year: {frame_time}")+
shadow_wake(wake_length = 0.2, alpha = FALSE)
animate(p, fps=4)
Full_Agg_Wide1 = Full_Agg_Wide %>% filter(year >1987)
p <- ggplot(
Full_Agg_Wide1 ,
aes(x = as.numeric(`GDP per capita (constant 2015 US$)`), y=as.numeric(`Mortality rate, under-5 (per 1,000)`), size =(as.numeric(`Population, total`)/1000000), colour = `Country Name`)
) +
geom_point(show.legend = TRUE, alpha = 0.7) +
scale_color_viridis_d() +
scale_size(range = c(2, 12)) +
scale_x_log10() +
labs(x = "Gross savings (% of GDP)", y = "Mortality rate, under-5 (per 1,000)", size= "Population in Millions")
p <-p + transition_time(Full_Agg_Wide1$year) +
labs(title = "Year: {frame_time}")+
shadow_wake(wake_length = 0.2, alpha = FALSE)
animate(p, fps=4)
p <- ggplot(Full_dat_Wide %>% drop_na(`Mortality rate, under-5 (per 1,000)`) %>% drop_na(`Current health expenditure (% of GDP)`), aes(x=`GDP per capita (constant 2015 US$)`, y=`Mortality rate, under-5 (per 1,000)`, color = Continent)) +
geom_point(aes(size = `Population, total`, frame = year, ids = `Country Name`)) +
#scale_x_log10()+
facet_wrap(~`Continent`)+ labs(title = "Mortality rate by GDP per capita")
ggplotly(p)
p <- ggplot(Full_dat_Wide %>% drop_na(`Mortality rate, under-5 (per 1,000)`) %>% drop_na(`Current health expenditure (% of GDP)`), aes(x=`Current health expenditure (% of GDP)`, y= `Mortality rate, under-5 (per 1,000)`, color = Continent)) +
geom_point(aes(size = `Population, total`, frame=year, ids = `Country Name`))+
facet_wrap(~`Continent`)+ labs(title = "Mortality rate by current health expenditure (% of GDP)")
ggplotly(p)
Below is an attempt to functionalize the animated plots, I got close but couldn’t quite figure out how to get it work correctly.
name_change <- function(var,col){
suffix <- "Length"
if (substr(var,nchar(var)-nchar(suffix)+1,nchar(var)) == suffix)
{col <- as.integer(col)}
col
}
animated_plotly <- function(Dataset, XCol, YCol, facet){
#Dataset1 <- Dataset %>% drop_na(XCol) %>% drop_na(YCol) %>% as.numeric(XCol) %>% as.numeric(Dataset1$YCol)
Dataset1 <- Dataset %<>%
mutate_at(vars(ends_with(")")), as.numeric) %>%
glimpse(60)
if (facet == "Yes") {
p <- ggplot(Dataset1, aes(x=XCol, y=YCol, color = Dataset1$Continent)) +
geom_point(aes(size = Dataset1$`Population, total`, frame=Dataset1$year, ids = Dataset1$`Country Name`))
#facet_wrap(as.factor(Dataset1$`Continent`))
ggplotly(p)
}else{
p <- ggplot(Dataset1, aes(x=XCol, y=YCol, color = Dataset1$Continent)) +
geom_point(aes(size = Dataset1$`Population, total`, frame=Dataset1$year, ids = Dataset1$`Country Name`))
View(Dataset1)
ggplotly(p)
}
}
animated_plotly(Full_Agg_Wide, "Current health expenditure (% of GDP)", "Mortality rate, under-5 (per 1,000)", "Yes")
## Rows: 350
## Columns: 15
## $ `Country Name` <chr> ~
## $ `Country Code` <chr> ~
## $ year <int> ~
## $ `Mortality rate, under-5 (per 1,000)` <dbl> ~
## $ `Population, total` <chr> ~
## $ `Inflation, GDP deflator (annual %)` <dbl> ~
## $ `Gross savings (% of GDP)` <dbl> ~
## $ `Gross domestic savings (% of GDP)` <dbl> ~
## $ `GDP per capita (constant 2015 US$)` <dbl> ~
## $ `GDP growth (annual %)` <dbl> ~
## $ `GDP (constant 2015 US$)` <dbl> ~
## $ `Current health expenditure (% of GDP)` <dbl> ~
## $ `Central government debt, total (% of GDP)` <dbl> ~
## $ `Unemployment, total (% of total labor force) (national estimate)` <dbl> ~
## $ `GDP per capita growth (annual %)` <dbl> ~